################################################################################################
########  Volha Charnysh. 2024. Uprooted: How post-WWII Population Transfers Remade Europe. CUP
######### Replication for Chapter 4; analysis in Bavarian municipalities 
######### R version 3.6.3 (2020-02-29) -- "Holding the Windsock"
######### Platform: x86_64-apple-darwin15.6.0 (64-bit)
######################################################################################################
#Note descriptive statistics are reproduced in code for chapter 8 (Bavaria)

rm(list = ls())

library(readstata13)
library(stringr)
library(dplyr)
library(openxlsx)
library(gplots) #for plotmeans
library(sandwich)
library(lme4)
library(lmtest)
library(texreg)
library(ggplot2)
library(maptools)
library(rgdal) #for GIS
library(rgeos) #for GIS
library(raster)
library(sf) #for GIS
library(sp) #for GIS
library(geosphere) #for dist to line
library(reldist) #for gini



####### CREATE FUNCTIONS ########

#function to create formulas
formulas <- function(a, b) {
  formulas <- list()
  n <- 0
    for (i in a) {
      for (j in b) {
        n <- n + 1
        formulas[n] <- paste(i, j)
      }
    }
  return(formulas)
}


#function to run models
run_models <- function(formlist) {
  models <- list()
  n <- 0
  for (i in 1:length(formlist)){
    n <- n + 1
    models[[n]] <- eval(parse(text=formlist[[n]])) 
  }
  return(models)
}

#vcov function 
hc1<-function(x) vcovHC(x, type="HC1")  

#function to extract coefficients + calculate errors
run_coef <- function(output) {
  stdev <- list() 
  n <- 0
  for (i in 1:length(output)){
    n <- n + 1
    stdev[[n]] <- coeftest(output[[n]], vcov. = hc1)
  }
  return(stdev)    
}


##reading in data

dat<-read.xlsx("chapter4.xlsx")

###### CALCCULATE MAIN VARIABLES: 

dat$distBorderEastKM<-dat$distBorderEast/1000

dat$sagri1939<-dat$LandForstBerufszugehorige1939/dat$Pop1939
dat$sagri1950<-dat$pop1950agriculture/dat$pop1950
dat$smale1950<-(dat$pop1950-dat$pop1950female)/dat$pop1950


dat$sfarms05_2ha1939<-dat$Betriebe05_2_1939/dat$BetriebeInsg_1939
dat$sfarms2bis5ha1939<-dat$Betriebe2bis5_1939/dat$BetriebeInsg_1939
dat$sfarms5bis10ha1939<-dat$Betriebe5bis10_1939/dat$BetriebeInsg_1939
dat$sfarms10bis20ha1939<-dat$Betriebe10bis20_1939/dat$BetriebeInsg_1939
dat$sfarms20bis100ha1939<-dat$Betriebe20bis100_1939/dat$BetriebeInsg_1939
dat$sfarmsOver100ha1939<-dat$Betribe100undMehr_1939/dat$BetriebeInsg_1939

#Calculate landholding inequality: 

gini.res1939 <- c()
for (i in 1:nrow(dat)) {
  gini.res1939[i] <- gini(x = c(1.25, 3.5, 7.5, 15, 60, 100),  #these are assumptions
                      weights = as.numeric(as.vector(dat[i, c("sfarms05_2ha1939", "sfarms2bis5ha1939", "sfarms5bis10ha1939", 
                                                              "sfarms10bis20ha1939", "sfarms20bis100ha1939", "sfarmsOver100ha1939")])))
}

dat$land.gini1939 <- gini.res1939


dat$srefugees1950<-dat$refugees1950/dat$pop1950
dat$scath1950<-dat$catholics1950census/dat$pop1950
dat$sprot1950<-dat$protestants1950census/dat$pop1950
dat$sother1950<-1-dat$scath1950-dat$sprot1950

dat$relfrac1950<-1-(dat$scath1950^2+dat$sprot1950^2+dat$sother1950^2)


dat$hebesatzgrundstother1950[which(dat$hebesatzgrundstother1950==0)]<-NA
dat$hebesatzgewerbest1950[which(dat$hebesatzgewerbest1950==0)]<-NA

dat$Stadtkreis<-ifelse(dat$kreis %in% c("Stadtkreise","Stadtkreis"), 1, 0) 

#####################################################################################################################################################
## Note descriptive statistics presented in Table A.3 are based on variables aggregated to contemporary Bavarian municipalities (N=2056).
## See code in chapter8_Bavaria.R to reproduce that table
##################################################################################################################################################


############################################################
################# Table 4.1 TAX RATES: in Bavaria  #########
############################################################


dv<-list("lm(hebesatzgrundstagri1950/100~", "lm(hebesatzgrundstother1950/100~", "lm(hebesatzgewerbest1950/100~", "lm(HebesGrundstA1961/100~", "lm(HebesGrundstB1961/100~", "lm(HebesGewerbest1961/100~")
expvarsV1 <- list("srefugees1950+ relfrac1950 +sagri1950+smale1950+sagri1939+log(pop1950)+land.gini1939+log(distBorderEastKM)+kreis, data=subset(dat, Stadtkreis==0))") 


#create and run formulas + get coefficients
reg.formula1 <- formulas(dv, expvarsV1)
mod <- run_models(reg.formula1) 
mod.cl <- run_coef(mod)


table4.1 <- texreg(
  l = list(mod[[1]], mod[[2]], mod[[3]], mod[[4]], mod[[5]], mod[[6]]),
  override.se=list(mod.cl[[1]][,2], mod.cl[[2]][,2], mod.cl[[3]][,2], mod.cl[[4]][,2], mod.cl[[5]][,2], mod.cl[[6]][,2]),
  override.pval=list(mod.cl[[1]][,4], mod.cl[[2]][,4], mod.cl[[3]][,4], mod.cl[[4]][,4], mod.cl[[5]][,4], mod.cl[[6]][,4]),     
  custom.model.names = c(rep("Tax rates in 1950",3), rep("Tax rates in 1961",3)),
  custom.coef.names=c("Share expellees (1950)","Religious fractionalization (1950)",  "Share in agriculture (1950)",  "Share male (1950)", "Share in agriculture (1939)", "Log(Population in 1950)", "Landholding gini 1939", "Log(Distance to the eastern border)"),
  dcolumn = TRUE,
  booktabs = TRUE,
  stars = c(.01,.05,.1),
  label = "tab:taxrates",
  omit.coef = "kreis|Intercept",
  include.rsquared = FALSE, 
  include.rmse=FALSE
)

table4.1 
